path = "crime.csv"
df = pd.read_csv(path, encoding='latin-1')
df.head()
| INCIDENT_NUMBER | OFFENSE_CODE | OFFENSE_CODE_GROUP | OFFENSE_DESCRIPTION | DISTRICT | REPORTING_AREA | SHOOTING | OCCURRED_ON_DATE | YEAR | MONTH | DAY_OF_WEEK | HOUR | UCR_PART | STREET | Lat | Long | Location | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | I182070945 | 619 | Larceny | LARCENY ALL OTHERS | D14 | 808 | NaN | 2018-09-02 13:00:00 | 2018 | 9 | Sunday | 13 | Part One | LINCOLN ST | 42.357791 | -71.139371 | (42.35779134, -71.13937053) |
| 1 | I182070943 | 1402 | Vandalism | VANDALISM | C11 | 347 | NaN | 2018-08-21 00:00:00 | 2018 | 8 | Tuesday | 0 | Part Two | HECLA ST | 42.306821 | -71.060300 | (42.30682138, -71.06030035) |
| 2 | I182070941 | 3410 | Towed | TOWED MOTOR VEHICLE | D4 | 151 | NaN | 2018-09-03 19:27:00 | 2018 | 9 | Monday | 19 | Part Three | CAZENOVE ST | 42.346589 | -71.072429 | (42.34658879, -71.07242943) |
| 3 | I182070940 | 3114 | Investigate Property | INVESTIGATE PROPERTY | D4 | 272 | NaN | 2018-09-03 21:16:00 | 2018 | 9 | Monday | 21 | Part Three | NEWCOMB ST | 42.334182 | -71.078664 | (42.33418175, -71.07866441) |
| 4 | I182070938 | 3114 | Investigate Property | INVESTIGATE PROPERTY | B3 | 421 | NaN | 2018-09-03 21:05:00 | 2018 | 9 | Monday | 21 | Part Three | DELHI ST | 42.275365 | -71.090361 | (42.27536542, -71.09036101) |
df.profile_report()
plt.figure(figsize=(16,10))
sns.distplot(np.log(df.OFFENSE_CODE), kde=True, rug=True)
plt.show
<function matplotlib.pyplot.show(*args, **kw)>
locs = list(zip(df.Lat.dropna(), df.Long.dropna()))
m = folium.Map(location=['42.3600825', '-71.0588801'])
for lat, lng in locs[:1000]:
m.add_children(folium.Marker((lat, lng)))
m
locs = list(zip(df.Lat.dropna(), df.Long.dropna()))
print(type(locs))
heat_data = [[lat, long] for lat, long in locs[:30000]]
map_ = folium.Map(location=['42.3600825', '-71.0588801'], zoom_start=11)
hm = plugins.HeatMap(heat_data,auto_play=True,max_opacity=0.4)
hm.add_to(map_)
map_
<class 'list'>
data = df[df.SHOOTING == 1][["Lat", "Long"]].values
#locs = list(zip(df.Lat.dropna(), df.Long.dropna()))
heat_data = [[lat, long] for lat, long in data[:20]]
map_ = folium.Map(location=['42.3600825', '-71.0588801'], zoom_start=11)
hm = plugins.HeatMap(heat_data,auto_play=True,max_opacity=0.4)
hm.add_to(map_)
map_
plt.figure(figsize=(16,10))
sns.scatterplot(x='Lat', y='Long', hue='DISTRICT', alpha=0.01, data=df[df['Lat'] > 1])
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
<matplotlib.legend.Legend at 0x13dc32d30>
crime_per_district=df.DISTRICT.value_counts()
plt.figure(figsize=(10,10))
plt.pie(crime_per_district.values, labels=crime_per_district.index, autopct='%1.1f%%', startangle=90, colors=sns.color_palette("husl", 13))
plt.title('Repartition of crimes by Districts')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.show()
sns.catplot(y="OFFENSE_CODE_GROUP", kind='count', height=8, aspect=2, order=df.OFFENSE_CODE_GROUP.value_counts().index, data=df)
plt.xlabel('OFFENSE CODE ', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.title("Number of crime per code", fontsize=20)
Text(0.5, 1.0, 'Number of crime per code')
sns.catplot( x="YEAR", kind='count', data=df, aspect=2)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.title("Number of crime per Year", fontsize=20)
plt.show()
sns.catplot( x="Day", kind='count',height=7, aspect=3, data=df, order=col_order)
plt.xlabel('Day', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.title("Number of crime per day", fontsize=20)
plt.show()
sns.catplot( x="HOUR", kind='count', height=8.27, aspect=3, data=df)
plt.xlabel('Hour', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.title("Number of crime per Hour", fontsize=20)
plt.show()
sns.catplot( x="MONTH", kind='count', data=df, aspect=2)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.title("Number of crime per months", fontsize=20)
plt.show()
plt.figure(figsize=(16,10))
data= np.array(df[["INCIDENT_NUMBER", "OFFENSE_CODE", "YEAR", "MONTH", "Location"]])
print(type(data))
sns.distplot(data)
plt.show()